Analytic Background:
Our study data were a subset derived from the Alzheimer’s
Disease Neuroimaging Initiative (ADNI) ADNI3 wave data bank. This was
due to the harmonization of scanner sequence protocols across data
collection sites that began during this wave. We only included subjects
that had both structural MRI and PET (amyloid & tau) data collected,
followed by QA of these data. This analysis leveraged a normative model
developed by the Predictive Clinical Neuroscience Group at the Donders
Institute and Radboud UMC, which aims to predict and stratify brain
disorders on the basis of neuroimaging data. Specifically, we used
‘BLR_lifespan_57K_82sites’, which makes use of the Bayesian Linear
Regression algorithm trained on ~57,000 subjects from 82 different
collection sites, across the human lifespan.
Our analytic approach builds upon the foundational work established by Verdi et al. (2023), who utilized normative modeling techniques to delineate neuroanatomical heterogeneity in Alzheimer’s disease. We employed a similar methodological framework to extend these insights by integrating both structural MRI and PET amyloid and tau data. This integration allowed us to explore more deeply the neuroanatomical and neuropsychiatric underpinnings of Alzheimer’s disease across different phenotypes within our ADNI subset.
Limitations/Considerations:
knitr::opts_chunk$set(echo = TRUE)
# Set CRAN repository for consistent package installation, ggseg for neuroimaging visualizations
options(repos = c(
ggseg = 'https://ggseg.r-universe.dev',
CRAN = 'https://cloud.r-project.org'))
# Install and load the pacman package for efficient package management
if (!require(pacman)) install.packages("pacman")
library(pacman)
# Use p_load function to install (if necessary) and load packages
p_load(dplyr, knitr, kableExtra, psych, tidyr, readr, stringr, matrixStats, ggseg, ggseg3d, ggsegExtra, ggsegDesterieux,
cowplot, data.table, e1071, ggplot2, plot.matrix, proxy, RPMG, broom, gridExtra, patchwork, caret, tidyverse,
fastDummies, sjPlot, ggbeeswarm, lavaan, caTools, bitops, effects, ggeffects, reshape2, ggpubr)
##
## The downloaded binary packages are in
## /var/folders/bv/m7vlb1y52q99_f9x_j031q1c0000gn/T//Rtmp3bTABL/downloaded_packages
# Specify the 'base_path' where you can find your data files, ASSUMING they're in the same directory, & set it as WD
base_path <- "~/Downloads"
setwd(base_path)
# Read in foundational datasets
NIDP_DX_Dem_NP <- read_csv("NIDP_DX_Dem_NP_MRI.csv") # Data set with clinical/non-imaging variables and MRI manufacturer information per subject assuming separate from FreeSurfer dataset
ADNI_freesurfer <- read_csv("ADNI_lh_rh_thickness_subcort_volumes.csv") # Data set with FreeSurfer brain thickness values
# Merging datasets on 'Subject_ID' with only ID values present in NIDP_DX_Dem_NP
ADNI_274_Merged <- merge(NIDP_DX_Dem_NP, ADNI_freesurfer, by = "Subject_ID", all.x = TRUE)
# Save the newly merged dataset as a CSV file
write_csv(ADNI_274_Merged, "ADNI_274_Merged.csv")
### Manual edits were done to this spreadsheet to only leave FreeSurfer values and balancing variables for data split
### FreeSurfer variables were edited to match PCN template, see GUI for details
### Covariates: 'age', 'sex', 'site'
# Rename finalized, merged file to 'ADNI_274_Merged_Final.csv'
ADNI_274_Merged_Final <- read_csv("ADNI_274_Merged_Final.csv") # This should be the spreadsheet that will be split into adaptation and testing sets for normative modeling
# Create a bar plot for MRI manufacturers with counts displayed
ggplot(NIDP_DX_Dem_NP, aes(x = MRI_MANU)) +
geom_bar(stat = "count", fill = "skyblue", color = "black") +
geom_text(stat = "count", aes(label = ..count..), vjust = -0.5) +
labs(title = "Count of MRI Manufacturers", x = "MRI Manufacturer", y = "Count") +
theme_minimal()
# Create separate bar plots for each manufacturer
ggplot(NIDP_DX_Dem_NP, aes(x = MRI_MAKE, fill = MRI_MANU)) +
geom_bar(position = "dodge") +
geom_text(aes(label = ..count..), stat = "count", position = position_dodge(width = 0.9), vjust = -0.5) +
facet_wrap(~MRI_MANU, scales = "free_x") +
labs(title = "Counts of Each MRI Make Grouped by Manufacturer", x = "MRI Make", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for better visibility
# Counting subjects by BioMarkers within each site
bio_markers_site_counts <- ADNI_274_Merged_Final %>%
group_by(site, BioMarkers) %>%
dplyr::summarise(Count = n(), .groups = 'drop')
# Plot with Site on x-axis
plot_site_x <- ggplot(bio_markers_site_counts, aes(x = site, y = Count, fill = BioMarkers)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_text(aes(label = Count), vjust = -0.5, position = position_dodge(width = 0.9)) +
labs(title = "Subject Counts by BioMarkers Across Sites",
x = "Site",
y = "Number of Subjects",
fill = "BioMarkers") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0, hjust = 1))
# Plot with BioMarkers on x-axis
plot_biomarkers_x <- ggplot(bio_markers_site_counts, aes(x = BioMarkers, y = Count, fill = factor(site))) +
geom_bar(stat = "identity", position = position_dodge()) + # Use position_dodge() for proper grouping
geom_text(aes(label = Count), vjust = -0.5, position = position_dodge(width = 0.9)) + # Adjust text positioning
labs(title = "Subject Counts Across BioMarkers by Site",
x = "BioMarkers",
y = "Number of Subjects",
fill = "Site") +
scale_fill_brewer(palette = "Set1") + # Using a qualitative palette for distinct sites
theme_minimal() +
theme(axis.text.x = element_text(angle = 0, hjust = 1)) # Improve readability of x-axis labels
# Optionally, print both plots to view them in the R environment
print(plot_site_x)
print(plot_biomarkers_x)
# Separate the data based on 'BioMarkers'
adaptation_data <- ADNI_274_Merged_Final %>%
filter(BioMarkers %in% c("A-C-", "A+C-"))
testing_data <- ADNI_274_Merged_Final %>%
filter(BioMarkers %in% c("A-C+", "A+C+"))
# Save the adaptation and testing datasets as CSV files
write_csv(adaptation_data, "ADNI_175_Adaptation.csv")
write_csv(testing_data, "ADNI_99_Testing.csv")
# After verifying that the grouping variable and covariates are balanced as desired (i.e., see the next chunk), you can now run them through the normative model computation via the GUI or recreate their code yourself!
# Plotting the distribution of 'BioMarkers' in both datasets
biomarker_distribution <- bind_rows(
mutate(adaptation_data, dataset = "Adaptation"),
mutate(testing_data, dataset = "Testing")
) %>%
group_by(dataset, BioMarkers) %>%
dplyr::summarise(Count = n(), .groups = 'drop') %>%
group_by(dataset) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
ggplot(biomarker_distribution, aes(x = BioMarkers, y = Percentage, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Distribution of Biomarker", x = "Biomarker", y = "Percentage (%)") +
theme_minimal()
# Plotting the distribution of 'site' in both datasets
site_distribution <- bind_rows(
mutate(adaptation_data, dataset = "Adaptation"),
mutate(testing_data, dataset = "Testing")
) %>%
group_by(dataset, site) %>%
dplyr::summarise(Count = n(), .groups = 'drop') %>%
group_by(dataset) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
ggplot(site_distribution, aes(x = site, y = Percentage, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Distribution of Site", x = "Site", y = "Percentage (%)") +
theme_minimal()
# Plotting the distribution of 'sex' in both datasets
sex_distribution <- bind_rows(
mutate(adaptation_data, dataset = "Adaptation"),
mutate(testing_data, dataset = "Testing")
) %>%
group_by(dataset, sex) %>%
dplyr::summarise(Count = n(), .groups = 'drop') %>%
group_by(dataset) %>%
mutate(Percentage = (Count / sum(Count)) * 100)
ggplot(sex_distribution, aes(x = sex, y = Percentage, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Distribution of Sex", x = "Sex", y = "Percentage (%)") +
theme_minimal()
# Calculating and comparing the average age
age_summary <- bind_rows(
mutate(adaptation_data, dataset = "Adaptation"),
mutate(testing_data, dataset = "Testing")
) %>%
group_by(dataset) %>%
dplyr::summarise(Average_Age = mean(age, na.rm = TRUE), .groups = 'drop')
print(age_summary)
## # A tibble: 2 × 2
## dataset Average_Age
## <chr> <dbl>
## 1 Adaptation 70.1
## 2 Testing 72.1
# Rename data frame for efficient code modeling, using dataset with volumetric FreeSurfer measures for ALL subjects
df_merged <- read_csv("ADNI_274_Merged_Final.csv") # to be used ONLY for group cortical thickness comparisons
# Rename biomarker variable
df_merged <- dplyr::rename(df_merged, biomarker = "BioMarkers")
# Specifying the order of the first few columns for easier viewing
desired_order <- c("Subject_ID", "age", "sex", "site", "biomarker")
# Append the remaining column names that are not specified in desired_order
remaining_cols <- setdiff(names(df_merged), desired_order)
# Combine the specified order with the remaining columns
new_order <- c(desired_order, remaining_cols)
# Reorder the columns in df according to new_order
df_merged <- df_merged[, new_order]
# Recode biomarker variable
df_merged$biomarker <- recode(df_merged$biomarker,
"A-C-" = "HC",
"A+C-" = "HC",
"A-C+" = "MCI",
"A+C+" = "AD")
df_merged$biomarker <- as.factor(df_merged$biomarker)
df_merged$biomarker <- factor(df_merged$biomarker, levels = c("HC", "MCI", "AD")) # explicitly set the reference level
# Recode site variable
df_merged$site <- as.factor(df_merged$site)
df_merged$site <- gsub("1", "GE", df_merged$site)
df_merged$site <- gsub("2", "Philips", df_merged$site)
df_merged$site <- gsub("3", "Siemens", df_merged$site)
# Recode sex variable, 0= females 1= males
df_merged$sex <- factor(df_merged$sex)
df_merged$sex <- gsub("0", "Female", df_merged$sex)
df_merged$sex <- gsub("1", "Male", df_merged$sex)
# Read in computed deviation scores
ADNI_deviation_scores <- read_csv("ADNI_deviation_scores_BLR.csv") # This dataset contains raw subject FreeSurfer volumetric measures and their respective deviation scores for your TESTING set used in the Normative Modeling
# Use dummy_cols to create dummy variables for the 'BioMarkers' column
ADNI_deviation_scores <- dummy_cols(ADNI_deviation_scores, select_columns = "BioMarkers", remove_first_dummy = TRUE, remove_selected_columns = TRUE)
# Rename data frame for efficient code modeling, this is the data frame that will be used for all other analyses EXCEPT group cortical thickness comparisons
df <- ADNI_deviation_scores
# Omit the 'index' column from the data frame, unnecessary variable
df <- df[, -which(names(df) == "index")]
# Rename biomarker variable
df <- dplyr::rename(df, biomarker = `BioMarkers_A+C+`)
# Specifying the order of the first few columns for easier viewing
desired_order <- c("Subject_ID", "age", "sex", "site", "biomarker")
# Append the remaining column names that are not specified in desired_order
remaining_cols <- setdiff(names(df), desired_order)
# Combine the specified order with the remaining columns
new_order <- c(desired_order, remaining_cols)
# Reorder the columns in df according to new_order
df <- df[, new_order]
# List of columns to exclude from the ROI list
desired_order <- c("Subject_ID", "age", "sex", "site", "biomarker")
# Extract column names, remove those in desired_order, and remove z-score suffixes
ROI <- names(df)[!names(df) %in% desired_order & !grepl("_Z_predict", names(df))]
# Further clean the ROI names to ensure no duplicates from z-score names
ROI <- unique(gsub("_Z_predict", "", ROI))
# Recode biomarker variable
df$biomarker <- as.factor(df$biomarker)
df$biomarker <- gsub("0", "MCI", df$biomarker)
df$biomarker <- gsub("1", "AD", df$biomarker)
df$biomarker <- factor(df$biomarker, levels = c("MCI", "AD")) # explicitly set the reference level
# Recode site variable
df$site <- as.factor(df$site)
df$site <- gsub("1", "GE", df$site)
df$site <- gsub("2", "Philips", df$site)
df$site <- gsub("3", "Siemens", df$site)
# Recode sex variable, 0= females 1= males
df$sex <- factor(df$sex)
df$sex <- gsub("0", "Female", df$sex)
df$sex <- gsub("1", "Male", df$sex)
# Read in Destrieux atlas data
data("desterieux", package = "ggseg") # Note: the ggseg package adds an extra 'e' in the spelling of the name Destrieux, so it's NOT a typo
# Assign atlas data to data frames in local environment
desterieux_dims <- desterieux$data
desterieux <- desterieux
# Plot simple atlas features to test data frame with dimension data
plot(desterieux_dims) +
theme(legend.position = "bottom",
legend.text = element_text(size = 7)) +
guides(fill = guide_legend(ncol = 3))
## NULL
# Plot atlas ROIs with labels to test data frame with complete vectors
ggplot() +
geom_brain(atlas = desterieux)
# Count of each variable of interest
table(df_merged$biomarker)
##
## HC MCI AD
## 175 40 59
table(df_merged$sex)
##
## Female Male
## 153 121
table(df_merged$site)
##
## GE Philips Siemens
## 49 48 177
# Cross-Tabulation of average thickness and biomarker group
describeBy(df_merged$Mean_Thickness, df_merged$biomarker)
##
## Descriptive statistics by group
## group: HC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 175 2.43 0.08 2.43 2.43 0.07 2.17 2.65 0.48 -0.15 0.12 0.01
## ------------------------------------------------------------
## group: MCI
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 40 2.4 0.08 2.41 2.4 0.07 2.24 2.59 0.35 -0.1 -0.28 0.01
## ------------------------------------------------------------
## group: AD
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 59 2.35 0.09 2.34 2.35 0.1 2.16 2.55 0.39 -0.08 -0.83 0.01
d<-(describeBy(df_merged$Mean_Thickness, df_merged$biomarker))
# Cross-Tabulation of age and biomarker group
describeBy(df_merged$age, df_merged$biomarker)
##
## Descriptive statistics by group
## group: HC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 175 70.1 5.94 69 69.81 5.93 55 90 35 0.53 0.66 0.45
## ------------------------------------------------------------
## group: MCI
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 40 71.97 8.81 72 72.16 8.9 56 88 32 -0.22 -0.87 1.39
## ------------------------------------------------------------
## group: AD
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 59 72.25 7.73 73 72.43 7.41 55 89 34 -0.19 -0.33 1.01
mean(df$age)
## [1] 72.14141
sd(df$age)
## [1] 8.13911
# Generate the violin plot of age stratified by phenotype categories
ggplot(df_merged, aes(x = biomarker, y = age, fill = biomarker)) +
geom_violin() +
labs(title = "Distribution of Age Stratified by Phenotype Groups",
x = "Phenotype Group", y = "Age") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0, hjust = 1)) # Improve readability of x-axis labels
# Cross-Tabulation of sex & biomarker, visualization
ggplot(df_merged, aes(x = biomarker, fill = sex)) +
geom_bar(position = "dodge") +
labs(title = "Distribution of Sex Across Phenotype Groups",
x = "Phenotype Group",
y = "Count",
fill = "Sex") +
theme_minimal()
# Create the summary Mean_Thickness data frame
filtered_MT_summary <- df_merged %>%
group_by(biomarker) %>%
dplyr::summarise(
score_mean = mean(Mean_Thickness, na.rm = TRUE),
n = n(), # Sample size for each group
sem = sd(Mean_Thickness, na.rm = TRUE) / sqrt(n()),
.groups = 'drop'
)
# Create the rain cloud plot with separate components
ggplot(df_merged, aes(x = biomarker, y = Mean_Thickness, fill = biomarker, color = biomarker)) +
PupillometryR::geom_flat_violin(adjust = 1.5, trim = FALSE, alpha = 0.5, position = position_nudge(x = 0.2, y = 0), colour = NA) +
geom_point(aes(x = as.numeric(biomarker)-0.25, y = Mean_Thickness, colour = biomarker), position = position_jitter(width = .05), size = .5, shape = 20) +
geom_boxplot(outlier.shape = NA, alpha = 0.5, width = 0.25, position = position_dodge(width = 0.3), colour = "black") +
geom_point(data = filtered_MT_summary, aes(x = factor(biomarker), y = score_mean), shape = 18, position = position_nudge(x = 0.2)) +
geom_errorbar(data = filtered_MT_summary, aes(x = factor(biomarker), y = score_mean, ymin = score_mean - sem, ymax = score_mean + sem), width = 0.05, position = position_nudge(x = 0.2)) +
labs(
title = "Distribution of Mean Cortical Thickness Stratified by Phenotype Groups",
y = "Score",
x = "Biomarker",
fill = "Phenotype Group", # Legend title for fill
color = "Phenotype Group" # Legend title for color
) +
theme_minimal() +
theme(
legend.position = "right",
legend.title = element_text(face = "bold") # Make legend title bold
)
# Fit a linear model of Mean_Thickness as a function of biomarker and store it in s_2
s_2 <- lm(Mean_Thickness ~ biomarker, data = df_merged)
# Create a table for the linear model with confidence intervals
tab_model(s_2, title = "Linear Model: Average Cortical Thickness as a Function of Biomarker")
| Â | Mean Thickness | ||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 2.43 | 2.42 – 2.44 | <0.001 |
| biomarker [MCI] | -0.03 | -0.06 – 0.00 | 0.079 |
| biomarker [AD] | -0.08 | -0.10 – -0.05 | <0.001 |
| Observations | 274 | ||
| R2 / R2 adjusted | 0.126 / 0.119 | ||
# Perform Tukey's Honest Significant Difference test and store results
tukey_results_s2 <- TukeyHSD(aov(Mean_Thickness ~ biomarker, data = df_merged))
print(tukey_results_s2)
Tukey multiple comparisons of means 95% family-wise confidence level
Fit: aov(formula = Mean_Thickness ~ biomarker, data = df_merged)
$biomarker diff lwr upr p adj MCI-HC -0.02631591 -0.06143749 0.008805661 0.1831271 AD-HC -0.07969303 -0.10986243 -0.049523633 0.0000000 AD-MCI -0.05337712 -0.09442260 -0.012331635 0.0067581
tukey_data_s2 <- as.data.frame(tukey_results_s2$biomarker)
tukey_data_s2$Comparison <- rownames(tukey_data_s2)
tukey_data_s2$significant <- ifelse(tukey_data_s2$`p adj` < 0.05, "Significant", "Not Significant")
# Creating the plot of the Tukey HSD test with significance indication
ggplot(tukey_data_s2, aes(y = Comparison, xmin = lwr, xmax = upr, x = diff)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
geom_errorbarh(aes(height = 0.2, color = significant)) +
geom_point(aes(x = diff, color = significant), size = 2) +
scale_color_manual(values = c("Significant" = "red", "Not Significant" = "blue")) +
labs(title = "Tukey HSD Test Results for Mean Cortical Thickness by Biomarker",
x = "Differences in Mean Levels of Biomarker",
y = "Comparison",
color = "P-value Significance") +
theme_minimal()
# Fit a linear model of Mean_Thickness as a function of biomarker with added covariates
s_1<-lm(Mean_Thickness ~ biomarker + age + sex, data = df_merged)
# Create a table for the linear model including covariates with confidence intervals
tab_model(s_1, title = "Enhanced Linear Model: Average Cortical Thickness as a Function of Biomarker, Adjusted for Age and Sex")
| Â | Mean Thickness | ||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 2.58 | 2.48 – 2.68 | <0.001 |
| biomarker [MCI] | -0.02 | -0.05 – 0.01 | 0.221 |
| biomarker [AD] | -0.07 | -0.10 – -0.05 | <0.001 |
| age | -0.00 | -0.00 – -0.00 | 0.007 |
| sex [Male] | -0.02 | -0.04 – 0.00 | 0.119 |
| Observations | 274 | ||
| R2 / R2 adjusted | 0.159 / 0.147 | ||
# Plot multiple regression model
ggpredict(s_1, c(terms = "age", "biomarker", "sex")) |>
plot() +
labs(title = "Average Cortical Thickness as a Function of Biomarker, Adjusted for Age and Sex",
x = "Age",
y = "Mean Cortical Thickness (mm)",
caption = "Note: 'biomarker' factors, 'HC' = Healthy Control; 'MCI' = Mild Cognitive Impairment; & 'AD' = Alzheimer's Disease.")
# Apply suffix to all ROI FreeSurfer measures
df.rois <- df_merged %>% rename_at(vars((6:153)), ~ paste0(., '_rois'))
# Run ANOVA analyses for each ROI across biomarker group and extract just the p-values
df.stats <- as.data.frame(sapply(X = df.rois[,grep("_rois", names(df.rois),value = T)], FUN = function(x) summary(aov(x ~ df.rois$biomarker))[[1]][["Pr(>F)"]][1]))
# Rename columns in ANOVA output data frame
names(df.stats) <- "p_value"
setDT(df.stats, keep.rownames = "ROI")
# Verify that changes were made via the first 20 ROIs
head(df.stats, 20)
# Run False Discovery Rate (FDR) corrections
df.stats <- cbind(df.stats, p.adjust(df.stats$p_value), method = "fdr")
# Rename column of FDR-corrected p-values
names(df.stats)[3] <- "FDR.pvalue"
# Verify that changes were made
head(df.stats, 20)
### Repeat steps to now obtain the F-stat values
# Run ANOVA analyses for each ROI across biomarker group and extract just the F-stat values
df.f_stats <- as.data.frame(sapply(X = df.rois[,grep("_rois", names(df.rois),value = T)], FUN = function(x) summary(aov(x ~ df.rois$biomarker))[[1]][["F value"]][1]))
# Rename columns in ANOVA output data frame
names(df.f_stats) <- "f_stat"
setDT(df.f_stats, keep.rownames = "ROI")
# Verify that changes were made via the first 20 ROIs
head(df.f_stats, 20)
# List the ROIs that are significant after FDR correction
df.stats[which(df.stats$FDR.pvalue <0.05),]
# Assign significance levels
df.stats$Significance <- ifelse(df.stats$FDR.pvalue < 0.001, '***',
ifelse(df.stats$FDR.pvalue < 0.01, '**',
ifelse(df.stats$FDR.pvalue < 0.05, '*', '')))
# Merge data frames for visualization
df.stats_merged <- merge(df.stats, df.f_stats, by = "ROI")
# Melt the data for plotting
df.melted <- melt(df.stats_merged, id.vars = "ROI", variable.name = "Statistic", value.name = "Value")
# Ensure that 'Value' is numeric
df.melted$Value <- as.numeric(as.character(df.melted$Value))
# Ensure df.stats and df.melted are merged to filter and sort
df.melted <- df.melted %>%
filter(Statistic == "f_stat") %>%
left_join(df.stats %>% select(ROI, Significance, FDR.pvalue), by = "ROI") %>%
filter(FDR.pvalue < 0.05) %>%
arrange(match(Significance, c("", "*", "**", "***"))) # Order by significance levels
# Remove suffix from ROI list
df.melted$ROI <- gsub("_rois", "", df.melted$ROI)
# Define group positions and labels
group_positions <- data.frame(
start = c(1, 15, 30), # example start positions
end = c(14, 29, 51), # example end positions
label = c("*", "**", "***") # example labels for significance
)
# Create the heatmap
heatmap_plot <- ggplot(df.melted, aes(x = ROI, y = Statistic, fill = Value)) +
geom_tile() +
scale_fill_gradient(low = "blue", high = "red") +
labs(title = "Heatmap of ANOVA Statistics Across ROIs", x = "Region of Interest", y = "F-Statistic") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Add custom brackets using annotations
for (i in 1:nrow(group_positions)) {
heatmap_plot <- heatmap_plot +
annotate("segment", x = group_positions$start[i], xend = group_positions$end[i], y = Inf, yend = Inf, yjust = 1.5, color = "black", size = 0.5) +
annotate("text", x = (group_positions$start[i] + group_positions$end[i]) / 2, y = Inf, label = group_positions$label[i], vjust = 2, hjust = 0.5)
}
print(heatmap_plot)
ggsave("~/Downloads/ANOVA_heatmap.pdf")
# Remove suffix from ROI list
df.stats$ROI <- as.data.frame(gsub("_rois", "", df.stats$ROI))
### Rename ROIs to assign nomenclature consistent with the 'desterieux' atlas package
# Left hemisphere ROIs
left.df.stats <- df.stats %>%
filter(str_detect(ROI, "L_"))
x <- gsub("L_", "", left.df.stats$ROI)
y <- gsub("G&S_", "G and S ", x)
z <- gsub("G_", "G ", y)
z1 <- gsub("S_", "S ", z)
z2 <- gsub("_", " ", z1)
z3 <- gsub(" bin", "", z2)
z4 <- gsub("\\.", " ", z3)
z5 <- gsub("cingul ", "cingul-", z4)
z6 <- gsub("Mid ", "Mid-", z5)
z7 <- gsub("Post ", "Post-", z6)
z8 <- gsub("inf ", "inf-", z7)
z9 <- gsub("med ", "med-", z8)
z10 <- gsub("sup ", "sup-", z9)
z11 <- gsub("Fis ant ", "Fis-ant-", z10)
z12 <- gsub("precentral ", "precentral-", z11)
z13 <- gsub("Fis pos ", "Fis-pos-", z12)
z14 <- gsub("lg&S", "lg and S", z13)
z15 <- gsub("oc temp", "oc-temp", z14)
z16 <- gsub("sup and transversal", "sup-transversal", z15)
z17 <- gsub("orbital H Shaped", "orbital-H Shaped", z16)
z18 <- gsub("oc sup&transversal", "oc sup and transversal", z17)
z19 <- gsub("prim Jensen", "prim-Jensen", z18)
z20 <- gsub("S oc-temp med&Lingual", "S oc-temp med and Lingual", z19)
z21 <- gsub("lat fusifor", "lat-fusifor", z20)
z22 <- gsub("middle&Lunatus", "middle and Lunatus", z21)
z23 <- gsub("intrapariet&P trans", "intrapariet and P trans", z22)
renamed_ROIs <- gsub("Lat Fis post", "Lat Fis-post", z23)
desterieux_ROIs <- as.data.frame(desterieux_dims %>% filter(hemi == "left"))$region
compare_lists <- cbind(sort(renamed_ROIs), sort(unique(desterieux_ROIs)))
list_matches <- compare_lists[,1] %in% compare_lists[,2]
compare_lists[!list_matches,]
## [,1] [,2]
## if no mismatches, than add to data.frame as region
left.df.stats$region <- renamed_ROIs
# Right hemisphere ROIs
right.df.stats <- df.stats %>%
filter(str_detect(ROI, "R_"))
x <- gsub("R_", "", right.df.stats$ROI)
y <- gsub("G&S_", "G and S ", x)
z <- gsub("G_", "G ", y)
z1 <- gsub("S_", "S ", z)
z2 <- gsub("_", " ", z1)
z3 <- gsub(" bin", "", z2)
z4 <- gsub("\\.", " ", z3)
z5 <- gsub("cingul ", "cingul-", z4)
z6 <- gsub("Mid ", "Mid-", z5)
z7 <- gsub("Post ", "Post-", z6)
z8 <- gsub("inf ", "inf-", z7)
z9 <- gsub("med ", "med-", z8)
z10 <- gsub("sup ", "sup-", z9)
z11 <- gsub("Fis ant ", "Fis-ant-", z10)
z12 <- gsub("precentral ", "precentral-", z11)
z13 <- gsub("Fis pos ", "Fis-pos-", z12)
z14 <- gsub("lg&S", "lg and S", z13)
z15 <- gsub("oc temp", "oc-temp", z14)
z16 <- gsub("sup and transversal", "sup-transversal", z15)
z17 <- gsub("orbital H Shaped", "orbital-H Shaped", z16)
z18 <- gsub("oc sup&transversal", "oc sup and transversal", z17)
z19 <- gsub("prim Jensen", "prim-Jensen", z18)
z20 <- gsub("S oc-temp med&Lingual", "S oc-temp med and Lingual", z19)
z21 <- gsub("lat fusifor", "lat-fusifor", z20)
z22 <- gsub("middle&Lunatus", "middle and Lunatus", z21)
z23 <- gsub("intrapariet&P trans", "intrapariet and P trans", z22)
renamed_ROIs <- gsub("Lat Fis post", "Lat Fis-post", z23)
desterieux_ROIs <- as.data.frame(desterieux_dims %>% filter(hemi == "right"))$region
compare_lists <- cbind(sort(renamed_ROIs), sort(unique(desterieux_ROIs)))
list_matches <- compare_lists[,1] %in% compare_lists[,2]
compare_lists[!list_matches,]
## [,1] [,2]
## if no mismatches, than add to data.frame as region
right.df.stats$region <- renamed_ROIs
# Left hemisphere
left_pvalues <- ggseg(.data=left.df.stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "left", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
# Right hemisphere
right_pvalues <- ggseg(.data=right.df.stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "right", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
# Add titles to individual plots
left_pvalues <- left_pvalues + labs(title = "Between-Group Comparisons")
right_pvalues <- right_pvalues + labs(title = "Between-Group Comparisons")
cowplot::plot_grid(left_pvalues,right_pvalues, nrow = 2, labels = "AUTO")
ggsave("~/Downloads/corticalthicknesspvalue_maps.pdf")
# Data frame preparation
df.CA <- df_merged[grep("HC|AD", df_merged$biomarker), ] # Subset just Controls and Alzheimer's ("CA")
df.CM <- df_merged[grep("HC|MCI", df_merged$biomarker), ] # Subset just Controls and MCI ("CM")
df.MA <- df_merged[grep("MCI|AD", df_merged$biomarker), ] # Subset just MCI and Alzheimer's ("MA")
### Control vs Alzheimer's t-test
# Perform t-test and extract t-stat value, p-value, and parameters
df.CA_stats <- as.data.frame(sapply(df.CA[6:153], function(x) t.test(x ~ df.CA$biomarker)$statistic))
df.CA_stats1 <- as.data.frame(sapply(df.CA[6:153], function(x) t.test(x ~ df.CA$biomarker)$p.value))
df.CA_stats2 <- as.data.frame(sapply(df.CA[6:153], function(x) t.test(x ~ df.CA$biomarker)$parameter))
# Merge data frames and remove the subsequent, intermediate data frames
df.CA_stats <- cbind(df.CA_stats, df.CA_stats1)
df.CA_stats <- cbind(df.CA_stats, df.CA_stats2)
rm(df.CA_stats1, df.CA_stats2)
# Clean up the t-test data frame
df.CA_stats$t_statistic <- df.CA_stats[2] # Rename column
names(df.CA_stats) <- c('statistic', 'p.value', 'parameter')
df.CA_stats[4] <- NULL
# Perform FDR correction on t-test data frame
df.CA_stats <- cbind(df.CA_stats, p.adjust(df.CA_stats$p.value), method = "fdr")
names(df.CA_stats)[4] <- "FDR.pvalue"
df.CA_stats[5] <- NULL
# Clean up the FDR-corrected data frame
df.CA_stats <- tibble::rownames_to_column(df.CA_stats, "ROI") # Create ROI column
df.CA_stats$ROI <- gsub("\\.t$", "", df.CA_stats$ROI)
# List the ROIs that are significant after FDR correction
df.CA_stats_sig <- df.CA_stats[which(df.CA_stats$FDR.pvalue <0.05),]
### Control vs MCI t-test
# Perform t-test and extract t-stat value, p-value, and parameters
df.CM_stats <- as.data.frame(sapply(df.CM[6:153], function(x) t.test(x ~ df.CM$biomarker)$statistic))
df.CM_stats1 <- as.data.frame(sapply(df.CM[6:153], function(x) t.test(x ~ df.CM$biomarker)$p.value))
df.CM_stats2 <- as.data.frame(sapply(df.CM[6:153], function(x) t.test(x ~ df.CM$biomarker)$parameter))
# Merge data frames and remove the subsequent, intermediate data frames
df.CM_stats <- cbind(df.CM_stats, df.CM_stats1)
df.CM_stats <- cbind(df.CM_stats, df.CM_stats2)
rm(df.CM_stats1, df.CM_stats2)
# Clean up the t-test data frame
df.CM_stats$t_statistic <- df.CM_stats[2] # Rename column
names(df.CM_stats) <- c('statistic', 'p.value', 'parameter')
df.CM_stats[4] <- NULL
# Perform FDR correction on t-test data frame
df.CM_stats <- cbind(df.CM_stats, p.adjust(df.CM_stats$p.value), method = "fdr")
names(df.CM_stats)[4] <- "FDR.pvalue"
df.CM_stats[5] <- NULL
# Clean up the FDR-corrected data frame
df.CM_stats <- tibble::rownames_to_column(df.CM_stats, "ROI") # Create ROI column
df.CM_stats$ROI <- gsub("\\.t$", "", df.CM_stats$ROI)
# List the ROIs that are significant after FDR correction
df.CM_stats_sig <- df.CM_stats[which(df.CM_stats$FDR.pvalue <0.05),]
### MCI vs Alzheimer's t-test
# Perform t-test and extract t-stat value, p-value, and parameters
df.MA_stats <- as.data.frame(sapply(df.MA[6:153], function(x) t.test(x ~ df.MA$biomarker)$statistic))
df.MA_stats1 <- as.data.frame(sapply(df.MA[6:153], function(x) t.test(x ~ df.MA$biomarker)$p.value))
df.MA_stats2 <- as.data.frame(sapply(df.MA[6:153], function(x) t.test(x ~ df.MA$biomarker)$parameter))
# Merge data frames and remove the subsequent, intermediate data frames
df.MA_stats <- cbind(df.MA_stats, df.MA_stats1)
df.MA_stats <- cbind(df.MA_stats, df.MA_stats2)
rm(df.MA_stats1, df.MA_stats2)
# Clean up the t-test data frame
df.MA_stats$t_statistic <- df.MA_stats[2] # Rename column
names(df.MA_stats) <- c('statistic', 'p.value', 'parameter')
df.MA_stats[4] <- NULL
# Perform FDR correction on t-test data frame
df.MA_stats <- cbind(df.MA_stats, p.adjust(df.MA_stats$p.value), method = "fdr")
names(df.MA_stats)[4] <- "FDR.pvalue"
df.MA_stats[5] <- NULL
# Clean up the FDR-corrected data frame
df.MA_stats <- tibble::rownames_to_column(df.MA_stats, "ROI") # Create ROI column
df.MA_stats$ROI <- gsub("\\.t$", "", df.MA_stats$ROI)
# List the ROIs that are significant after FDR correction
df.MA_stats_sig <- df.MA_stats[which(df.MA_stats$FDR.pvalue <0.05),]
# Combine the significant results for easy plotting
combined_stats_sig <- bind_rows(
df.CA_stats_sig %>% mutate(Comparison = "HC vs AD"),
df.MA_stats_sig %>% mutate(Comparison = "MCI vs AD")
)
# Assign significance levels
combined_stats_sig$Significance <- ifelse(combined_stats_sig$FDR.pvalue < 0.001, '***',
ifelse(combined_stats_sig$FDR.pvalue < 0.01, '**',
ifelse(combined_stats_sig$FDR.pvalue < 0.05, '*', '')))
# Create the lollipop plot
ggplot(combined_stats_sig, aes(x = reorder(ROI, statistic), y = statistic, color = Comparison)) +
geom_segment(aes(yend = 0, xend = ROI), size = 1, linetype = "dashed") +
geom_point(size = 3) +
labs(
title = "Significant Differences in Brain Regions Across Comparisons",
x = "Region of Interest",
y = "T-Statistic Value",
color = "Comparison",
caption = "Note: 'HC' = Healthy Control; 'MCI' = Mild Cognitive Impairment; & 'AD' = Alzheimer's Disease.") +
theme_minimal() +
theme(
legend.position = "top",
axis.text.x = element_text(angle = 90, hjust = 1, size = 7.5) # Rotate x labels for better visibility
)
ggsave("~/Downloads/lollipop_plot.pdf")
### Rename ROIs to assign nomenclature consistent with the 'desterieux' atlas package
# Automated loop version
rename_rois <- function(df, hemi, desterieux_dims) {
# Filter based on hemisphere
hemi_prefix <- ifelse(hemi == "left", "L_", "R_")
df_filtered <- df %>%
filter(str_detect(ROI, hemi_prefix))
# Perform renaming steps
x <- gsub(paste0(hemi_prefix), "", df_filtered$ROI)
patterns <- c("G&S_", "G_", "S_", "_", " bin", "\\.", "cingul ", "Mid ", "Post ",
"inf ", "med ", "sup ", "Fis ant ", "precentral ", "Fis pos ", "lg&S",
"oc temp", "sup and transversal", "orbital H Shaped", "oc sup&transversal",
"prim Jensen", "S oc-temp med&Lingual", "lat fusifor", "middle&Lunatus",
"intrapariet&P trans", "Lat Fis post")
replacements <- c("G and S ", "G ", "S ", " ", "", " ", "cingul-", "Mid-", "Post-",
"inf-", "med-", "sup-", "Fis-ant-", "precentral-", "Fis-pos-",
"lg and S", "oc-temp", "sup-transversal", "orbital-H Shaped",
"oc sup and transversal", "prim-Jensen", "S oc-temp med and Lingual",
"lat-fusifor", "middle and Lunatus", "intrapariet and P trans",
"Lat Fis-post")
for (i in seq_along(patterns)) {
x <- gsub(patterns[i], replacements[i], x)
}
# Match with desterieux ROIs
desterieux_ROIs <- as.data.frame(desterieux_dims %>% filter(hemi == hemi))$region
compare_lists <- cbind(sort(x), sort(unique(desterieux_ROIs)))
list_matches <- compare_lists[,1] %in% compare_lists[,2]
if (any(!list_matches)) {
warning("There are mismatches in ROI names for the ", hemi, " hemisphere.")
}
df_filtered$region <- x
return(df_filtered)
}
# Apply the function to each dataset and hemisphere
left.df.CA_stats <- rename_rois(df.CA_stats, "left", desterieux_dims)
right.df.CA_stats <- rename_rois(df.CA_stats, "right", desterieux_dims)
left.df.CM_stats <- rename_rois(df.CM_stats, "left", desterieux_dims)
right.df.CM_stats <- rename_rois(df.CM_stats, "right", desterieux_dims)
left.df.MA_stats <- rename_rois(df.MA_stats, "left", desterieux_dims)
right.df.MA_stats <- rename_rois(df.MA_stats, "right", desterieux_dims)
### Control vs Alzheimer's
# Left hemisphere
left_CA_pvalues <- ggseg(.data=left.df.CA_stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "left", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
# Right hemisphere
right_CA_pvalues <- ggseg(.data=right.df.CA_stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "right", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
### Control vs MCI
# Left hemisphere
left_CM_pvalues <- ggseg(.data=left.df.CM_stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "left", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
# Right hemisphere
right_CM_pvalues <- ggseg(.data=right.df.CM_stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "right", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
### MCI vs Alzheimer's
# Left hemisphere
left_MA_pvalues <- ggseg(.data=left.df.MA_stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "left", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
# Right hemisphere
right_MA_pvalues <- ggseg(.data=right.df.MA_stats, atlas = desterieux, mapping=aes(fill=FDR.pvalue), hemisphere = "right", colour = "white", size = 0.2) +
scale_fill_gradientn(limits = c(0,0.05), colours = rainbow.colors(5))
# Add titles to individual plots
left_CA_pvalues <- left_CA_pvalues + labs(title = "Healthy Controls vs Alzheimer's Group")
right_CA_pvalues <- right_CA_pvalues + labs(title = "Healthy Controls vs Alzheimer's Group")
left_CM_pvalues <- left_CM_pvalues + labs(title = "Healthy Controls vs MCI Group")
right_CM_pvalues <- right_CM_pvalues + labs(title = "Healthy Controls vs MCI Group")
left_MA_pvalues <- left_MA_pvalues + labs(title = "MCI Group vs Alzheimer's Group")
right_MA_pvalues <- right_MA_pvalues + labs(title = "MCI Group vs Alzheimer's Group")
# Print plots to verify that they were correctly constructed
cowplot::plot_grid(left_CA_pvalues, right_CA_pvalues, nrow = 2)
cowplot::plot_grid(left_CM_pvalues, right_CM_pvalues, nrow = 2)
cowplot::plot_grid(left_MA_pvalues, right_MA_pvalues, nrow = 2)
### After verifying
# Start the PDF device
pdf("Pairwise Cortical Thickness p-Value Maps.pdf", width = 11, height = 8.5)
# Lay out plots to be inserted and downloaded into a separate PDF file
cowplot::plot_grid(left_CA_pvalues, right_CA_pvalues, nrow = 2)
cowplot::plot_grid(left_CM_pvalues, right_CM_pvalues, nrow = 2)
cowplot::plot_grid(left_MA_pvalues, right_MA_pvalues, nrow = 2)
# Close the PDF device
dev.off()
## quartz_off_screen
## 2